import pandas as pd
import numpy as np
from scipy.special import expit  # Sigmoid 函数

# —————————————————————————
# 1. 读取全体报道基线数据
# —————————————————————————
full_file = "Monthly_Indices_Full.xlsx"
both = pd.read_excel(full_file, engine="openpyxl")

both['Month'] = pd.to_datetime(both['Month'], format='%Y-%m').dt.to_period('M')
both.set_index('Month', inplace=True)

# —————————————————————————
# 2. 计算“Balance / Impact / Tone”的 σ-NMAE
# —————————————————————————
metrics = ['BalanceIndex','ImpactIndex','ToneIndex']
baseline_metrics = both[metrics]
stds = baseline_metrics.std()   # 仍然使用 σ 归一化，保持可比性

media_file = "Monthly_Indices_50_Media.xlsx"
xls = pd.ExcelFile(media_file, engine="openpyxl")

records = []
for media in xls.sheet_names:
    dfm = pd.read_excel(xls, sheet_name=media, engine="openpyxl")
    dfm['Month'] = pd.to_datetime(dfm['Month'], format='%Y-%m').dt.to_period('M')
    dfm.set_index('Month', inplace=True)

    df_join = baseline_metrics.join(dfm[metrics], how='inner', rsuffix='_med')
    rec = {'Media': media}

    for m in metrics:
        diff = df_join[f'{m}_med'] - df_join[m]
        mae = np.abs(diff).mean()   # ⭐ 核心修改：RMSE → MAE
        rec[f'NMAE_{m}'] = mae / stds[m] if stds[m] != 0 else np.nan

    records.append(rec)

nmae_df = pd.DataFrame(records).set_index('Media')

# —————————————————————————
# 3. 对 TotalEvents 计算 σ-NMAE
# —————————————————————————
baseline_events = both['TotalEvents']
sigma_events = baseline_events.std()

ev_records = []
for media in xls.sheet_names:
    dfm = pd.read_excel(xls, sheet_name=media, engine="openpyxl")
    dfm['Month'] = pd.to_datetime(dfm['Month'], format='%Y-%m').dt.to_period('M')
    dfm.set_index('Month', inplace=True)

    aligned = pd.concat([baseline_events, dfm['TotalEvents']], axis=1, join='inner')
    aligned.columns = ['base','med']

    mae_ev = np.abs(aligned['med'] - aligned['base']).mean()  # ⭐ MAE
    ev_records.append({
        'Media': media,
        'NMAE_EventCount': mae_ev / sigma_events if sigma_events != 0 else np.nan
    })

ev_nmae_df = pd.DataFrame(ev_records).set_index('Media')

# —————————————————————————
# 4. 合并 MAE 指标
# —————————————————————————
all_df = nmae_df.join(ev_nmae_df, how='inner')

# —————————————————————————
# 5. Z-Score 标准化 + Sigmoid 映射
# —————————————————————————
mapped = all_df.copy()
for col in all_df.columns:
    mu = all_df[col].mean()
    sd = all_df[col].std()
    z = (all_df[col] - mu) / sd
    mapped[col.replace('NMAE_', 'SigmoidDeviation_')] = expit(z)

# —————————————————————————
# 6. 输出
# —————————————————————————
output_cols = [
    'SigmoidDeviation_BalanceIndex',
    'SigmoidDeviation_ImpactIndex',
    'SigmoidDeviation_ToneIndex',
    'SigmoidDeviation_EventCount'
]

mapped[output_cols].to_excel("Media_4Metrics_SigmoidDeviation_MAE.xlsx")

print("✅ 已生成：Media_4Metrics_SigmoidDeviation_MAE.xlsx")
